{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/hzy/anaconda3/anaconda/lib/python3.6/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    }
   ],
   "source": [
    "from __future__ import division\n",
    "import argparse\n",
    "import logging\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.svm import SVR\n",
    "from sklearn.tree import DecisionTreeRegressor\n",
    "import xgboost\n",
    "import time\n",
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.linear_model import Lasso\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.preprocessing import StandardScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/hzy/anaconda3/anaconda/lib/python3.6/importlib/_bootstrap.py:205: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6\n",
      "  return f(*args, **kwds)\n"
     ]
    }
   ],
   "source": [
    "from src.utils import kfold_validation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 准备工作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# df_me = pd.read_csv('./data/sourceB/shanghai_mall.csv', index_col=0)\n",
    "# X = df_me.filter(regex='f.*', axis=1).values\n",
    "# y = df_me['hotness'].values\n",
    "# y = np.asarray(df_me['hotness'], dtype=np.float64)\n",
    "# y = np.log(y)\n",
    "\n",
    "df = pd.read_csv('./data/sourceB_processed/beijing_street_processed.txt', sep='\\t',\n",
    "                names = ['lng', 'lat', 'r'] + ['f_b{}'.format(i) for i in range(19)] + ['hotness']\n",
    "                )\n",
    "df = df[df.hotness != 0]\n",
    "X = df.filter(regex='f_b.*', axis=1).values\n",
    "y = np.asarray(df['hotness'], dtype=np.float64)\n",
    "y = np.log(y)\n",
    "\n",
    "# X = StandardScaler().fit_transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.54925409236954759"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores = []\n",
    "for i in range(100):\n",
    "    score = kfold_validation(\n",
    "            X, y,\n",
    "            kfold_k=4,\n",
    "            model = LinearRegression(),\n",
    "            vis = False,\n",
    "            verbose = False,\n",
    "            use_mape=False,\n",
    "        )\n",
    "    scores.append(score)\n",
    "sum(scores) / len(scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
